## MAIN: Calibration plot
do_Predictive_Model_Fit <- function(data_cnr = dt_for_exhibits_cancer,
                        data_all = dt_for_exhibits_all) {
  
  
  dt_calib <- rbind(
    data_all[
      ,
      .(sample = "General Population Sample",
        actual_rate   = mean(DMG_died_within_365d == "1"),
        pred_mortlity = mean(prob_for_report)),
      by = .(bins = gtools::quantcut(prob_for_report,
                                     seq(0,1,0.05),
                                     right = TRUE,
                                     labels = as.character(seq(0.01,1,0.05))))
      ],
    data_cnr[
      ,
      .(sample = "Cancer Sample",
        actual_rate   = mean(DMG_died_within_365d == "1"),
        pred_mortlity = mean(prob_for_report)),
      by = .(bins = gtools::quantcut(prob_for_report,
                                     seq(0,1,0.05),
                                     right = TRUE,
                                     labels = as.character(seq(0.01,1,0.05))))
      ]
  )[, sample := factor(sample, levels=c("General Population Sample", "Cancer Sample"))]
  
  write.csv(dt_calib,"prediction_algo_perf_calib.csv")
  
  pdf("prediction_algo_perf_calib.pdf")
  print(
    ggplot() +
      geom_point(data = dt_calib,
                 aes(x = pred_mortlity, 
                     y = actual_rate),
                 size = 3) +
      geom_abline(intercept = 0, slope = 1, linetype = 3) +
      facet_grid(~sample) + 
      scale_x_continuous(breaks=seq(0, 1, 0.2)) +
      labs(x = "Average Predicted Mortality Risk", y = "Actual Mortality Rate",caption = "20 bins") + 
      theme(aspect.ratio = 1)
  )
  dev.off()
  
  
  # calibration by age group 
  calib_age <- 
    data_cnr[
      ,
      .(sample = "Cancer Sample",
        actual_rate   = mean(DMG_died_within_365d == "1"),
        pred_mortlity = mean(prob_for_report)),
      by = .(age_group = DMG_age_quintiles, 
             bins = gtools::quantcut(prob_for_report,
                                     seq(0,1,0.05),
                                     right = TRUE,
                                     labels = as.character(seq(0.01,1,0.05))))
      ]
  
  write.csv(calib_age,"prediction_algo_perf_calib_Age_group_cancer.csv")
  
  pdf("prediction_algo_perf_calib_Age_group_cancer.pdf")
  print(
    ggplot() +
      geom_point(data = calib_age,
                 aes(x = pred_mortlity, 
                     y = actual_rate),
                 size = 3) +
      geom_abline(intercept = 0, slope = 1, linetype = 3) +
      facet_wrap(~age_group) + 
      scale_x_continuous(breaks=seq(0, 1, 0.2)) +
      labs(x = "Average Predicted Mortality Risk", y = "Actual Mortality Rate",caption = "20 bins") + 
      theme(aspect.ratio = 1)
  )
  dev.off()
}
